library(ggplot2)
package 㤼㸱ggplot2㤼㸲 was built under R version 3.5.3
library(ggpubr)
Loading required package: magrittr
package 㤼㸱magrittr㤼㸲 was built under R version 3.5.3
library(CDM)
package 㤼㸱CDM㤼㸲 was built under R version 3.5.3Loading required package: mvtnorm
package 㤼㸱mvtnorm㤼㸲 was built under R version 3.5.3**********************************
** CDM 7.3-17 (2019-03-18 18:33:40)      
** Cognitive Diagnostic Models  **
**********************************
library(boot)
package 㤼㸱boot㤼㸲 was built under R version 3.5.3
Attaching package: 㤼㸱boot㤼㸲

The following object is masked from 㤼㸱package:lattice㤼㸲:

    melanoma
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 3.5.3-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v tibble  2.1.3     v purrr   0.3.2
v tidyr   0.8.3     v dplyr   0.8.3
v readr   1.3.1     v stringr 1.4.0
v tibble  2.1.3     v forcats 0.4.0
package 㤼㸱tibble㤼㸲 was built under R version 3.5.3package 㤼㸱tidyr㤼㸲 was built under R version 3.5.3package 㤼㸱readr㤼㸲 was built under R version 3.5.3package 㤼㸱purrr㤼㸲 was built under R version 3.5.3package 㤼㸱dplyr㤼㸲 was built under R version 3.5.3package 㤼㸱stringr㤼㸲 was built under R version 3.5.3package 㤼㸱forcats㤼㸲 was built under R version 3.5.3-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x tidyr::extract()   masks magrittr::extract()
x dplyr::filter()    masks stats::filter()
x dplyr::lag()       masks stats::lag()
x purrr::set_names() masks magrittr::set_names()
library(stringi)
package 㤼㸱stringi㤼㸲 was built under R version 3.5.3
library(stringr)

Read Data

rm(list = ls())

df.X <- read_csv("C:\\Users\\rkm160630\\OneDrive - The University of Texas at Dallas\\Ritesh Folder\\PhD\\FirstYearProject\\data\\Exam1Trial1_X.csv") %>% mutate_all(as.integer)
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
df.Q <- read_csv("C:\\Users\\rkm160630\\OneDrive - The University of Texas at Dallas\\Ritesh Folder\\PhD\\FirstYearProject\\data\\Exam1Trial1_Q.csv") %>% mutate_all(as.integer)
Parsed with column specification:
cols(
  Q_UNIQUE_ID = col_double(),
  `1` = col_double(),
  `2` = col_double(),
  `3` = col_double(),
  `4` = col_double()
)
X = df.X %>% select(-SubjectID) %>% rename_all(function(x) paste0("Q",x))
Q = df.Q %>% select(-Q_UNIQUE_ID) %>% rename_all(function(x) paste0("S",x))
X
# A tibble: 74 x 285
      Q1    Q2    Q3    Q4    Q5    Q6    Q7    Q8    Q9   Q10   Q11   Q12   Q13   Q14   Q15   Q16   Q17   Q18   Q19   Q20   Q21   Q22   Q23
   <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
 1    NA    NA    NA    NA    NA    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA     1    NA    NA    NA    NA    NA    NA     0    NA    NA    NA    NA
 3    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 4    NA     0    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA     1    NA    NA    NA    NA    NA    NA    NA
 5    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA     0    NA    NA    NA    NA    NA     1    NA    NA    NA    NA    NA
 6    NA    NA    NA    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 7    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA     1     1    NA    NA    NA    NA    NA
 8    NA    NA    NA    NA    NA    NA     1    NA    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 9    NA    NA    NA    NA    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA     0    NA    NA    NA    NA    NA    NA    NA
10    NA     1    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
# ... with 64 more rows, and 262 more variables: Q24 <int>, Q25 <int>, Q26 <int>, Q27 <int>, Q28 <int>, Q29 <int>, Q30 <int>, Q31 <int>,
#   Q32 <int>, Q33 <int>, Q36 <int>, Q37 <int>, Q38 <int>, Q39 <int>, Q40 <int>, Q41 <int>, Q42 <int>, Q43 <int>, Q44 <int>, Q45 <int>,
#   Q46 <int>, Q47 <int>, Q48 <int>, Q49 <int>, Q50 <int>, Q51 <int>, Q52 <int>, Q53 <int>, Q55 <int>, Q56 <int>, Q57 <int>, Q60 <int>,
#   Q61 <int>, Q62 <int>, Q63 <int>, Q64 <int>, Q65 <int>, Q66 <int>, Q67 <int>, Q68 <int>, Q69 <int>, Q70 <int>, Q71 <int>, Q72 <int>,
#   Q73 <int>, Q74 <int>, Q75 <int>, Q76 <int>, Q77 <int>, Q78 <int>, Q79 <int>, Q80 <int>, Q81 <int>, Q82 <int>, Q83 <int>, Q84 <int>,
#   Q85 <int>, Q86 <int>, Q87 <int>, Q88 <int>, Q89 <int>, Q90 <int>, Q91 <int>, Q92 <int>, Q93 <int>, Q94 <int>, Q95 <int>, Q96 <int>,
#   Q97 <int>, Q98 <int>, Q99 <int>, Q100 <int>, Q101 <int>, Q102 <int>, Q103 <int>, Q104 <int>, Q105 <int>, Q106 <int>, Q107 <int>,
#   Q108 <int>, Q109 <int>, Q110 <int>, Q111 <int>, Q112 <int>, Q113 <int>, Q114 <int>, Q115 <int>, Q116 <int>, Q117 <int>, Q118 <int>,
#   Q119 <int>, Q120 <int>, Q121 <int>, Q122 <int>, Q124 <int>, Q126 <int>, Q127 <int>, Q128 <int>, Q129 <int>, Q131 <int>, ...
Q
# A tibble: 285 x 4
      S1    S2    S3    S4
   <int> <int> <int> <int>
 1     1     0     0     0
 2     1     0     0     0
 3     1     0     0     0
 4     1     0     0     0
 5     1     0     0     0
 6     0     1     0     0
 7     1     0     0     0
 8     1     0     0     0
 9     1     0     0     0
10     1     0     0     0
# ... with 275 more rows
n=220
#X <- X %>% sample_n(size = n, replace = FALSE)

Student Performance

Bootstrap Sample

Boot requires a custom function that would return row wise stats. In case of dataframe, each row is considered as a multi-variate observation. First parameter of the custom function takes the data input and second parameter takes the importance of data given current bootstrap iteration. stype defines second parameter.




X <- as.data.frame(X) 
Q <- as.data.frame(Q) 




#model_dina <- gdina(data =  data.ecpe$data[,-1] , q.matrix =  data.ecpe$q.matrix, maxit = 10, rule = "GDINA2")

model_dina <- din(data = X, q.matrix =  Q)
---------------------------------------------------------------------------------
DINA MODEL 
** 2019-12-26 19:41:32 
---------------------------------------------------------------------------------
Iter.  1  :  19:41:32 ,   loglike= -929.4941  / max. param. ch. :  0.8  / relative deviance change :  1
Iter.  2  :  19:41:32 ,   loglike= -524.3309  / max. param. ch. :  0.995431  / relative deviance change :  0.772724
Iter.  3  :  19:41:32 ,   loglike= -505.2168  / max. param. ch. :  1  / relative deviance change :  0.037833
Iter.  4  :  19:41:32 ,   loglike= -495.3953  / max. param. ch. :  1  / relative deviance change :  0.019826
Iter.  5  :  19:41:32 ,   loglike= -491.0158  / max. param. ch. :  1  / relative deviance change :  0.008919
Iter.  6  :  19:41:33 ,   loglike= -489.1351  / max. param. ch. :  1  / relative deviance change :  0.003845
Iter.  7  :  19:41:33 ,   loglike= -487.719  / max. param. ch. :  1  / relative deviance change :  0.002904
Iter.  8  :  19:41:33 ,   loglike= -486.5123  / max. param. ch. :  1  / relative deviance change :  0.00248
Iter.  9  :  19:41:33 ,   loglike= -485.3022  / max. param. ch. :  0.163778  / relative deviance change :  0.002493
Iter.  10  :  19:41:33 ,   loglike= -483.5494  / max. param. ch. :  0.242179  / relative deviance change :  0.003625
Iter.  11  :  19:41:33 ,   loglike= -481.1056  / max. param. ch. :  0.175754  / relative deviance change :  0.00508
Iter.  12  :  19:41:33 ,   loglike= -479.7618  / max. param. ch. :  0.072064  / relative deviance change :  0.002801
Iter.  13  :  19:41:33 ,   loglike= -479.2588  / max. param. ch. :  1  / relative deviance change :  0.00105
Iter.  14  :  19:41:34 ,   loglike= -478.9472  / max. param. ch. :  0.021284  / relative deviance change :  0.000651
Iter.  15  :  19:41:34 ,   loglike= -478.7271  / max. param. ch. :  0.019872  / relative deviance change :  0.00046
Iter.  16  :  19:41:34 ,   loglike= -478.554  / max. param. ch. :  0.019841  / relative deviance change :  0.000362
Iter.  17  :  19:41:34 ,   loglike= -478.4088  / max. param. ch. :  0.019203  / relative deviance change :  0.000303
Iter.  18  :  19:41:34 ,   loglike= -478.2826  / max. param. ch. :  0.018833  / relative deviance change :  0.000264
Iter.  19  :  19:41:34 ,   loglike= -478.169  / max. param. ch. :  0.025832  / relative deviance change :  0.000237
Iter.  20  :  19:41:34 ,   loglike= -478.0623  / max. param. ch. :  0.034803  / relative deviance change :  0.000223
Iter.  21  :  19:41:34 ,   loglike= -477.9562  / max. param. ch. :  0.045335  / relative deviance change :  0.000222
Iter.  22  :  19:41:35 ,   loglike= -477.844  / max. param. ch. :  0.055548  / relative deviance change :  0.000235
Iter.  23  :  19:41:35 ,   loglike= -477.72  / max. param. ch. :  0.061434  / relative deviance change :  0.00026
Iter.  24  :  19:41:35 ,   loglike= -477.5862  / max. param. ch. :  0.058311  / relative deviance change :  0.00028
Iter.  25  :  19:41:35 ,   loglike= -477.4584  / max. param. ch. :  0.045567  / relative deviance change :  0.000268
Iter.  26  :  19:41:35 ,   loglike= -477.3559  / max. param. ch. :  0.029143  / relative deviance change :  0.000215
Iter.  27  :  19:41:35 ,   loglike= -477.2823  / max. param. ch. :  0.015927  / relative deviance change :  0.000154
Iter.  28  :  19:41:35 ,   loglike= -477.2277  / max. param. ch. :  0.008017  / relative deviance change :  0.000114
Iter.  29  :  19:41:36 ,   loglike= -477.1832  / max. param. ch. :  0.00562  / relative deviance change :  9.3e-05
Iter.  30  :  19:41:36 ,   loglike= -477.1449  / max. param. ch. :  0.005007  / relative deviance change :  8e-05
Iter.  31  :  19:41:36 ,   loglike= -477.111  / max. param. ch. :  0.004446  / relative deviance change :  7.1e-05
Iter.  32  :  19:41:36 ,   loglike= -477.0807  / max. param. ch. :  0.003937  / relative deviance change :  6.4e-05
Iter.  33  :  19:41:36 ,   loglike= -477.0533  / max. param. ch. :  0.003481  / relative deviance change :  5.7e-05
Iter.  34  :  19:41:36 ,   loglike= -477.0286  / max. param. ch. :  0.003144  / relative deviance change :  5.2e-05
Iter.  35  :  19:41:36 ,   loglike= -477.0062  / max. param. ch. :  0.002847  / relative deviance change :  4.7e-05
Iter.  36  :  19:41:36 ,   loglike= -476.9858  / max. param. ch. :  0.002585  / relative deviance change :  4.3e-05
Iter.  37  :  19:41:36 ,   loglike= -476.9672  / max. param. ch. :  0.002352  / relative deviance change :  3.9e-05
Iter.  38  :  19:41:37 ,   loglike= -476.9503  / max. param. ch. :  0.002145  / relative deviance change :  3.6e-05
Iter.  39  :  19:41:37 ,   loglike= -476.9348  / max. param. ch. :  0.001961  / relative deviance change :  3.2e-05
Iter.  40  :  19:41:37 ,   loglike= -476.9206  / max. param. ch. :  0.001797  / relative deviance change :  3e-05
Iter.  41  :  19:41:37 ,   loglike= -476.9076  / max. param. ch. :  0.00165  / relative deviance change :  2.7e-05
Iter.  42  :  19:41:37 ,   loglike= -476.8957  / max. param. ch. :  0.001519  / relative deviance change :  2.5e-05
Iter.  43  :  19:41:37 ,   loglike= -476.8847  / max. param. ch. :  0.001401  / relative deviance change :  2.3e-05
Iter.  44  :  19:41:37 ,   loglike= -476.8746  / max. param. ch. :  0.001294  / relative deviance change :  2.1e-05
Iter.  45  :  19:41:37 ,   loglike= -476.8653  / max. param. ch. :  0.001198  / relative deviance change :  2e-05
Iter.  46  :  19:41:37 ,   loglike= -476.8568  / max. param. ch. :  0.001111  / relative deviance change :  1.8e-05
Iter.  47  :  19:41:37 ,   loglike= -476.8488  / max. param. ch. :  0.001032  / relative deviance change :  1.7e-05
Iter.  48  :  19:41:38 ,   loglike= -476.8415  / max. param. ch. :  0.00096  / relative deviance change :  1.5e-05
Iter.  49  :  19:41:38 ,   loglike= -476.8347  / max. param. ch. :  0.000895  / relative deviance change :  1.4e-05
Iter.  50  :  19:41:38 ,   loglike= -476.8284  / max. param. ch. :  0.000835  / relative deviance change :  1.3e-05
Iter.  51  :  19:41:38 ,   loglike= -476.8226  / max. param. ch. :  0.00078  / relative deviance change :  1.2e-05
Iter.  52  :  19:41:38 ,   loglike= -476.8171  / max. param. ch. :  0.000729  / relative deviance change :  1.1e-05
Iter.  53  :  19:41:38 ,   loglike= -476.812  / max. param. ch. :  0.000694  / relative deviance change :  1.1e-05
Iter.  54  :  19:41:38 ,   loglike= -476.8073  / max. param. ch. :  0.000671  / relative deviance change :  1e-05
---------------------------------------------------------------------------------
Time difference of 6.584092 secs

Calculate item parameters


total_cdm_fn <- function(df, i = 1:dim(df)[1]) {
  
  #browser()
  #df.t <- X
  df.t <- df[i,]
  
  df.t.s1 <- df.t #%>% select(E1:E28) 

  ###########################
  
  df.cdm <- CDM::din(df.t.s1, Q, progress=FALSE)
 
  
   df.t1 <- tibble("value" = df.cdm$slip$est, "key" = paste0("s1_slip_E", seq(1,length(df.cdm$slip$est),1))) %>% 
     spread(key = "key", value = "value") %>% 
     select(!!!paste0("s1_slip_E", seq(1,length(df.cdm$slip$est),1)))
 
  
  df.t2 <- tibble("value" = df.cdm$guess$est, "key" = paste0("s1_guess_E", seq(1,length(df.cdm$guess$est),1))) %>% 
    spread(key = "key", value = "value") %>% 
    select(!!!paste0("s1_guess_E", seq(1,length(df.cdm$guess$est),1)))
  
  
  df.t <- cbind(df.t1, df.t2)
  
 return(as.matrix(df.t))
    
} 

t_cdm <- total_cdm_fn(X)

t_cdm[,1]
s1_slip_E1 
         0 
X.p1 <- X %>% sample_frac(size = 0.5)
X.p2 <- X %>% anti_join(X.p1)
Joining, by = c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11", "Q12", "Q13", "Q14", "Q15", "Q16", "Q17", "Q18", "Q19", "Q20", "Q21", "Q22", "Q23", "Q24", "Q25", "Q26", "Q27", "Q28", "Q29", "Q30", "Q31", "Q32", "Q33", "Q36", "Q37", "Q38", "Q39", "Q40", "Q41", "Q42", "Q43", "Q44", "Q45", "Q46", "Q47", "Q48", "Q49", "Q50", "Q51", "Q52", "Q53", "Q55", "Q56", "Q57", "Q60", "Q61", "Q62", "Q63", "Q64", "Q65", "Q66", "Q67", "Q68", "Q69", "Q70", "Q71", "Q72", "Q73", "Q74", "Q75", "Q76", "Q77", "Q78", "Q79", "Q80", "Q81", "Q82", "Q83", "Q84", "Q85", "Q86", "Q87", "Q88", "Q89", "Q90", "Q91", "Q92", "Q93", "Q94", "Q95", "Q96", "Q97", "Q98", "Q99", "Q100", "Q101", "Q102", "Q103", "Q104", "Q105", "Q106", "Q107", "Q108", "Q109", "Q110", "Q111", "Q112", "Q113", "Q114", "Q115", "Q116", "Q117", "Q118", "Q119", "Q120", "Q121", "Q122", "Q124", "Q126", "Q127", "Q128", "Q129", "Q131", "Q132", "Q134", "Q135", "Q136", "Q137", "Q138", "Q139", "Q140", "Q141", "Q142", "Q143", "Q144", "Q145", "Q146", "Q147", "Q148", "Q149", "Q150", "Q151", "Q152", "Q154", "Q155", "Q156", "Q157", "Q158", "Q159", "Q160", "Q161", "Q162", "Q163", "Q164", "Q165", "Q166", "Q167", "Q168", "Q169", "Q170", "Q171", "Q172", "Q174", "Q175", "Q176", "Q177", "Q178", "Q179", "Q180", "Q181", "Q182", "Q183", "Q185", "Q186", "Q187", "Q188", "Q190", "Q191", "Q192", "Q193", "Q194", "Q195", "Q196", "Q197", "Q198", "Q199", "Q200", "Q201", "Q202", "Q204", "Q205", "Q206", "Q207", "Q208", "Q209", "Q210", "Q211", "Q212", "Q213", "Q214", "Q215", "Q216", "Q217", "Q218", "Q219", "Q220", "Q221", "Q222", "Q223", "Q224", "Q225", "Q226", "Q227", "Q229", "Q230", "Q231", "Q232", "Q233", "Q234", "Q235", "Q236", "Q238", "Q240", "Q241", "Q242", "Q243", "Q245", "Q246", "Q247", "Q248", "Q249", "Q250", "Q251", "Q252", "Q254", "Q255", "Q256", "Q257", "Q258", "Q259", "Q260", "Q261", "Q262", "Q263", "Q265", "Q266", "Q267", "Q269", "Q270", "Q271", "Q272", "Q273", "Q274", "Q275", "Q276", "Q277", "Q278", "Q279", "Q280", "Q281", "Q282", "Q283", "Q284", "Q286", "Q287", "Q288", "Q289", "Q291", "Q292", "Q293", "Q295", "Q297", "Q298", "Q299", "Q300", "Q301", "Q302", "Q303", "Q304", "Q305", "Q306", "Q307", "Q308", "Q309", "Q310")
X.bt.1 <- boot(data = X , statistic = total_cdm_fn, R = 1000, stype = "i")
X.bt.2 <- boot(data = X , statistic = total_cdm_fn, R = 1000, stype = "i")
NaNs producedNaNs producedNaNs produced
plot(X.bt.1)



#unique(df.data$mastery_n)

size = dim(X)[2]

df.s1_slip <- X.bt.1$t[,1:size] %>%  as_tibble() %>% mutate(parameter = "Slip", group = "Partition 1")
`as_tibble.matrix()` requires a matrix with column names or a `.name_repair` argument. Using compatibility `.name_repair`.
This warning is displayed once per session.
df.s1_guess <- X.bt.1$t[,(size+1):56] %>% as_tibble() %>% mutate(parameter = "Guess", group = "Partition 1")


df.s2_slip <- X.bt.2$t[,1:size] %>%  as_tibble() %>% mutate(parameter = "Slip", group = "Partition 2")
df.s2_guess <- X.bt.2$t[,(size+1):56] %>% as_tibble() %>% mutate(parameter = "Guess", group = "Partition 2")


df.data.sim <- df.s1_slip %>% bind_rows(df.s1_guess) %>% bind_rows(df.s2_slip) %>% bind_rows(df.s2_guess) 
df.data.sim
NA
NA
df.plot <- df.data.sim
col_names <- colnames(df.data.sim %>% select(-group)) %>% str_replace("V","Q")
colnames(df.data.sim) <- colnames(df.data.sim) %>% str_replace("V","Q")

# For viewing purposes only
df.plot %>% gather(key = "questions", value="item_parameters", -group, -parameter) %>% 
  mutate(questions = factor(
    str_replace(questions, "V","Q"), 
    levels = col_names, 
    ordered = TRUE)) %>% 
  group_by(group, parameter, questions) %>% 
  summarise( sampling_mean = mean(item_parameters), sampling_error = sqrt(sum((item_parameters - sampling_mean)^2)/(n() - 1)) )
NA
df.plot <- df.plot %>% gather(key = "questions", value="item_parameters", -group, -parameter) %>% 
  mutate(questions = factor(str_replace(questions, "V","Q"), levels = col_names, ordered = TRUE)) %>% 
  group_by(group, parameter, questions) %>% 
  summarise( sampling_mean = mean(item_parameters), 
             sampling_error = sqrt(sum((item_parameters - sampling_mean)^2)/(n() - 1)) ) %>% 
  ungroup() 

df.plot %>%
  
  ggplot() + aes(x=questions, y = sampling_mean, color = parameter) + facet_grid(group ~ .  )  +geom_pointrange( aes(ymin=sampling_mean - sampling_error, 
                                                                                                                          ymax=sampling_mean + sampling_error), width=.2) + geom_point(size = 1) + theme(strip.text.y = element_text(angle = 0)) + ggtitle("Item Parameters estimated over 1000 Bootstrap samples")
Ignoring unknown parameters: width


df.plot  %>% 
  
  ggplot() + aes(x=questions, y = sampling_mean, color = group) + facet_grid(parameter ~ questions, scales = "free_x"  )  +
  
  geom_pointrange(position=position_dodge(width=0.5), aes(ymin=sampling_mean - sampling_error, ymax=sampling_mean + sampling_error), width=.2) +
  
  geom_point(position=position_dodge(width=0.5), size = 1) + theme(strip.text.y = element_text(angle = 0)) + ggtitle("Item Parameters estimated over 1000 Bootstrap samples") + theme(axis.text.x = element_text() , strip.background.x = element_rect(fill = "white"), axis.ticks.x = element_blank()) + labs(x="Questions / Items", y = "Sampling Mean with Sampling error") + scale_color_brewer(palette="Dark2")
Ignoring unknown parameters: width


df.plot %>% select(-sampling_error) %>% spread(key = group, value = "sampling_mean") %>%
  
  ggplot() + aes(x=`Partition 1`, y = `Partition 2`) + geom_text(aes(label = questions)) + geom_abline(aes(intercept = 0, slope = 1)) + facet_wrap(parameter~., scales = "free") + ggtitle(paste("Student sample size is", n/2, "for each partition"))


df.data.sim %>% group_by(group) %>% mutate(id=1:n()) %>% ungroup() %>% 
  gather(key = "key", value = "value", -parameter, -group, -id) %>% 
  arrange(id, group, parameter) %>% filter(parameter == "Guess") %>%
  
  ggplot() + aes(x = key, y = value, fill = group) + geom_boxplot() + facet_wrap(.~parameter, scales = "free") + coord_flip()

NA
NA
NA
---
title: "CDM Demo"
output: html_notebook
---


```{r}
library(ggplot2)
library(ggpubr)
library(CDM)
library(boot)
library(tidyverse)
library(stringi)
library(stringr)
```

# Read Data
```{r}
rm(list = ls())

df.X <- read_csv("C:\\Users\\rkm160630\\OneDrive - The University of Texas at Dallas\\Ritesh Folder\\PhD\\FirstYearProject\\data\\Exam1Trial1_X.csv") %>% mutate_all(as.integer)
df.Q <- read_csv("C:\\Users\\rkm160630\\OneDrive - The University of Texas at Dallas\\Ritesh Folder\\PhD\\FirstYearProject\\data\\Exam1Trial1_Q.csv") %>% mutate_all(as.integer)


X = df.X %>% select(-SubjectID) %>% rename_all(function(x) paste0("Q",x))
Q = df.Q %>% select(-Q_UNIQUE_ID) %>% rename_all(function(x) paste0("S",x))

```

```{r paged.print=FALSE}
X
```

```{r paged.print=FALSE}
Q
```

```{r}
n=220
#X <- X %>% sample_n(size = n, replace = FALSE)
```

# Student Performance


# Bootstrap Sample 

Boot requires a custom function that would return row wise stats. In case of dataframe, each row is considered as a multi-variate observation. First parameter of the custom function takes the data input and second parameter takes the importance of data given current bootstrap iteration. stype defines second parameter.



```{r}



X <- as.data.frame(X) 
Q <- as.data.frame(Q) 




#model_dina <- gdina(data =  data.ecpe$data[,-1] , q.matrix =  data.ecpe$q.matrix, maxit = 10, rule = "GDINA2")

model_dina <- din(data = X, q.matrix =  Q)

```



# Calculate item parameters

```{r}

total_cdm_fn <- function(df, i = 1:dim(df)[1]) {
  
  #browser()
  #df.t <- X
  df.t <- df[i,]
  
  df.t.s1 <- df.t #%>% select(E1:E28) 

  ###########################
  
  df.cdm <- CDM::din(df.t.s1, Q, progress=FALSE)
 
  
   df.t1 <- tibble("value" = df.cdm$slip$est, "key" = paste0("s1_slip_E", seq(1,length(df.cdm$slip$est),1))) %>% 
     spread(key = "key", value = "value") %>% 
     select(!!!paste0("s1_slip_E", seq(1,length(df.cdm$slip$est),1)))
 
  
  df.t2 <- tibble("value" = df.cdm$guess$est, "key" = paste0("s1_guess_E", seq(1,length(df.cdm$guess$est),1))) %>% 
    spread(key = "key", value = "value") %>% 
    select(!!!paste0("s1_guess_E", seq(1,length(df.cdm$guess$est),1)))
  
  
  df.t <- cbind(df.t1, df.t2)
  
 return(as.matrix(df.t))
    
} 

t_cdm <- total_cdm_fn(X)

t_cdm[,1]
```

```{r}
X.p1 <- X %>% sample_frac(size = 0.5)
X.p2 <- X %>% anti_join(X.p1)

X.bt.1 <- boot(data = X , statistic = total_cdm_fn, R = 1000, stype = "i")
X.bt.2 <- boot(data = X , statistic = total_cdm_fn, R = 1000, stype = "i")


plot(X.bt.1)
```

```{r}


#unique(df.data$mastery_n)

size = dim(X)[2]

df.s1_slip <- X.bt.1$t[,1:size] %>%  as_tibble() %>% mutate(parameter = "Slip", group = "Partition 1")
df.s1_guess <- X.bt.1$t[,(size+1):56] %>% as_tibble() %>% mutate(parameter = "Guess", group = "Partition 1")


df.s2_slip <- X.bt.2$t[,1:size] %>%  as_tibble() %>% mutate(parameter = "Slip", group = "Partition 2")
df.s2_guess <- X.bt.2$t[,(size+1):56] %>% as_tibble() %>% mutate(parameter = "Guess", group = "Partition 2")


df.data.sim <- df.s1_slip %>% bind_rows(df.s1_guess) %>% bind_rows(df.s2_slip) %>% bind_rows(df.s2_guess) 
df.data.sim


```

```{r}
df.plot <- df.data.sim
col_names <- colnames(df.data.sim %>% select(-group)) %>% str_replace("V","Q")
colnames(df.data.sim) <- colnames(df.data.sim) %>% str_replace("V","Q")

# For viewing purposes only
df.plot %>% gather(key = "questions", value="item_parameters", -group, -parameter) %>% 
  mutate(questions = factor(
    str_replace(questions, "V","Q"), 
    levels = col_names, 
    ordered = TRUE)) %>% 
  group_by(group, parameter, questions) %>% 
  summarise( sampling_mean = mean(item_parameters), 
             sampling_error = sqrt(sum((item_parameters - sampling_mean)^2)/(n() - 1)),
             sampling_variance = sum((item_parameters - sampling_mean)^2)/(n() - 1))

```


```{r}
df.plot <- df.plot %>% gather(key = "questions", value="item_parameters", -group, -parameter) %>% 
  mutate(questions = factor(str_replace(questions, "V","Q"), levels = col_names, ordered = TRUE)) %>% 
  
  group_by(group, parameter, questions) %>% 
  
  summarise( sampling_mean = mean(item_parameters), 
             sampling_error = sqrt(sum((item_parameters - sampling_mean)^2)/(n() - 1)),
             sampling_variance = sum((item_parameters - sampling_mean)^2)/(n() - 1)
             ) %>% 
  ungroup() 
```


```{r}

df.plot %>%
  
  ggplot() + aes(x=questions, y = sampling_mean, color = parameter) + facet_grid(group ~ .  )  +geom_pointrange( aes(ymin=sampling_mean - sampling_error, 
                                                                                                                          ymax=sampling_mean + sampling_error), width=.2) + geom_point(size = 1) + theme(strip.text.y = element_text(angle = 0)) + ggtitle("Item Parameters estimated over 1000 Bootstrap samples")


```




```{r}

df.plot  %>% 
  
  ggplot() + aes(x=questions, y = sampling_mean, color = group) + facet_grid(parameter ~ questions, scales = "free_x"  )  +
  
  geom_pointrange(position=position_dodge(width=0.5), aes(ymin=sampling_mean - sampling_error, ymax=sampling_mean + sampling_error), width=.2) +
  
  geom_point(position=position_dodge(width=0.5), size = 1) + theme(strip.text.y = element_text(angle = 0)) + ggtitle("Item Parameters estimated over 1000 Bootstrap samples") + theme(axis.text.x = element_text() , strip.background.x = element_rect(fill = "white"), axis.ticks.x = element_blank()) + labs(x="Questions / Items", y = "Sampling Mean with Sampling error") + scale_color_brewer(palette="Dark2")


```


```{r}

df.plot %>% select(-sampling_error) %>% spread(key = group, value = "sampling_mean") %>%
  
  ggplot() + aes(x=`Partition 1`, y = `Partition 2`) + geom_text(aes(label = questions)) + geom_abline(aes(intercept = 0, slope = 1)) + facet_wrap(parameter~., scales = "free") + ggtitle(paste("Student sample size is", n/2, "for each partition"))

```

```{r}

df.data.sim %>% group_by(group) %>% mutate(id=1:n()) %>% ungroup() %>% 
  gather(key = "key", value = "value", -parameter, -group, -id) %>% 
  mutate(id = paste(id,key)) %>% select(-key) %>% arrange(id, group, parameter) %>% 
  spread(key= "group", value = "value") %>% separate(id,c("id","question"), " " ) %>% select(-id)  %>%
  
  ggplot() + aes(x = `Partition 1`, y = `Partition 2`, color = question) + geom_point(show.legend = FALSE) + facet_wrap(.~parameter, scales = "free")



```

```{r}

df.data.sim %>% group_by(group) %>% mutate(id=1:n()) %>% ungroup() %>% 
  gather(key = "key", value = "value", -parameter, -group, -id) %>% 
  arrange(id, group, parameter) %>% filter(parameter == "Guess") %>%
  
  ggplot() + aes(x = key, y = value, fill = group) + geom_boxplot() + facet_wrap(.~parameter, scales = "free") + coord_flip()



```



